########################################################################################
### Replication Code                                                                 ###
### Title: Overestimation of the Level of Democracy among Citizens in Nondemocracies ###
### Author: Eddy S. F. Yeung                                                         ###
### Version: March 5, 2022                                                           ###
########################################################################################

### Set-up ###
# Clean the R environment and set the working directory
# RStudio version 1.3.959 (macOS)
rm(list = ls())
setwd("~/Desktop/CPS_replication/figures") # save figures in a "figures" folder

# Load the required packages
library(haven)     # version 2.4.3
library(dplyr)     # version 1.0.7
library(ggplot2)   # version 3.3.5
library(ggrepel)   # version 0.9.1
library(gridExtra) # version 2.3
library(extrafont) # version 0.17

# Import the datasets
df <- read_dta("~/Desktop/CPS_replication/dataset_cleaned.dta")
country_label <- read.csv("~/Desktop/CPS_replication/country-label.csv")

### Figure 1 ###
# Remove nonrespondents to the question on the perceived level of democracy
df <- subset(df, V141 != -1 & V141 != -2 & V141 != -3 & V141 != -4 & V141 != -5)

# Transform the range of perceived levels of democracy to 0 to 1
df$V141_tran <- (df$V141 - 10) / 9 + 1

# Derive the democracy levels by country
df.final1 <- df %>%
  group_by(name) %>%
  summarize(mean(V141_tran), mean(v2x_polyarchy))

# Rename variables
names(df.final1) <- c("name", "perceived_lvl", "measured_lvl")

# Get the country labels for graph plotting
df_fig_1 <- merge(df.final1, country_label, by = "name")

# Plot Figure 1
fig_1 <- 
  ggplot(df_fig_1, 
         aes(x = measured_lvl, y = perceived_lvl), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) +  
  geom_text_repel(aes(x = measured_lvl, y = perceived_lvl, 
                      label = country_label), 
                  size = 5, family = "Times") +
  xlim(0, 1.05) + 
  ylim(0, 1.05) +
  labs(x = "V-Dem Index (v2x_polyarchy)", 
       y = "Perceived Level of Democracy") +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") + 
  annotate(geom = "text", x = 0.20, y = 0.90, label = "italic(Overestimation)",
           parse = TRUE, size = 6, family = "Times") + 
  annotate(geom = "text", x = 0.85, y = 0.15, label = "italic(Underestimation)",
           parse = TRUE, size = 6, family = "Times") + 
  theme_bw() +
  theme(text = element_text(family = "Times", size = 16),
        axis.text = element_text(color = "black")
  )
ggsave("Figure 1.pdf", plot = fig_1, width = 7, height = 6.5)

### Figure 2 ###
df_autocracy_only <- subset(df, df$v2x_regime == 0 | df$v2x_regime == 1)

# Transform the range of perceived levels of democracy to 0 to 1
df_autocracy_only$V141_tran <- (df_autocracy_only$V141 - 10) / 9 + 1

# Derive the democracy levels by country
df.final2 <- df_autocracy_only %>%
  group_by(name) %>%
  summarize(mean(V141_tran), mean(v2x_polyarchy))

# Rename variables
names(df.final2) <- c("name", "perceived_lvl", "measured_lvl")

# Get the country labels for graph plotting
df_fig_2 <- merge(df.final2, country_label, by = "name")

# Plot Figure 2
fig_2 <- 
  ggplot(df_fig_2, 
         aes(x = measured_lvl, y = perceived_lvl), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F,
              fullrange = T, linetype = 6, color = "black") +
  geom_text_repel(aes(x = measured_lvl, y = perceived_lvl, 
                      label = country_label), 
                  size = 5, family = "Times") +
  xlim(0, 1) + 
  ylim(0, 1) +
  labs(x = "V-Dem Index (v2x_polyarchy)", 
       y = "Perceived Level of Democracy") +
  theme_bw() +
  theme(text = element_text(family = "Times", size = 16),
        axis.text = element_text(color = "black")
  )
ggsave("Figure 2.pdf", plot = fig_2, width = 7, height = 6.5)

### Figure 4 ###
# Drop Hong Kong respondents as HKG is not analyzed in the main text
df_autocracy_only <- subset(df_autocracy_only, code != 344)

# Define all non-responses as NA
df_autocracy_only$V228A <- ifelse(df_autocracy_only$V228A < 0, NA, 
                                  df_autocracy_only$V228A)
df_autocracy_only$V228B <- ifelse(df_autocracy_only$V228B < 0, NA, 
                                  df_autocracy_only$V228B)
df_autocracy_only$V228D <- ifelse(df_autocracy_only$V228D < 0, NA, 
                                  df_autocracy_only$V228D)
df_autocracy_only$V228H <- ifelse(df_autocracy_only$V228H < 0, NA, 
                                  df_autocracy_only$V228H)

# Reverse-code Freedom of the Press (0 = least free; 100 = most free)
df_autocracy_only$new_fotp <- 100 - df_autocracy_only$fotp

# Rescale Media System Freedom (0 = least free; 100 = most free)
df_autocracy_only$new_msf <- df_autocracy_only$MSF * 100

# Derive the media/Internet freedom levels and average responses by country
df.final3 <- df_autocracy_only %>%
  group_by(name) %>%
  summarize(mean(new_fotp), 
            mean(new_msf),
            mean(v2smgovfilprc),
            mean(v2smgovsmmon),
            mean(V228A, na.rm = T),
            mean(V228B, na.rm = T),
            mean(V228D, na.rm = T),
            mean(V228H, na.rm = T))

# Rename variables
names(df.final3) <- c("name", "fotp", "msf", "filtering", "monitoring", 
                      "V228A", "V228B", "V228D", "V228H")

# Get the country labels for graph plotting
df_fig_4 <- merge(df.final3, country_label, by = "name")

# Plot Figure 4 (Freedom of the Press and views on the electoral process)
# Panel A: votes are counted fairly (1 = very often; 4 = not at all often)
fig_A <- 
  ggplot(df_fig_4, 
         aes(x = fotp, y = V228A), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = fotp, y = V228A, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Freedom of the Press (0 = Least Free; 100 = Most Free)", 
       y = "Votes Are Counted Fairly\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel B: opposition candidates are prevented from running (1 = very often; 
# 4 = not at all often)
fig_B <- 
  ggplot(df_fig_4, 
         aes(x = fotp, y = V228B), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = fotp, y = V228B, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Freedom of the Press (0 = Least Free; 100 = Most Free)", 
       y = "Opposition Candidates Are Prevented from Running\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel C: voters are bribed (1 = very often; 4 = not at all often)
fig_C <- 
  ggplot(df_fig_4, 
         aes(x = fotp, y = V228D), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = fotp, y = V228D, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Freedom of the Press (0 = Least Free; 100 = Most Free)", 
       y = "Voters Are Bribed\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel D: voters are threatened with violence at the polls (1 = very often; 
# 4 = not at all often)
fig_D <- 
  ggplot(df_fig_4, 
         aes(x = fotp, y = V228H), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = fotp, y = V228H, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Freedom of the Press (0 = Least Free; 100 = Most Free)", 
       y = "Voters Are Threatened with Violence at the Polls\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Combine all four panels into one figure
fig_4 <- 
  grid.arrange(fig_A, fig_B, fig_C, fig_D, ncol = 2, nrow = 2)
ggsave("Figure 4.pdf", plot = fig_4, width = 10, height = 10)

### Figure S8 ###
# Plot Figure S8 (Media System Freedom and views on the electoral process)
# Panel A: votes are counted fairly (1 = very often; 4 = not at all often)
fig_A <- 
  ggplot(df_fig_4, 
         aes(x = msf, y = V228A), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = msf, y = V228A, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Media System Freedom (0 = Least Free; 100 = Most Free)", 
       y = "Votes Are Counted Fairly\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel B: opposition candidates are prevented from running (1 = very often; 
# 4 = not at all often)
fig_B <- 
  ggplot(df_fig_4, 
         aes(x = msf, y = V228B), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = msf, y = V228B, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Media System Freedom (0 = Least Free; 100 = Most Free)", 
       y = "Opposition Candidates Are Prevented from Running\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel C: voters are bribed (1 = very often; 4 = not at all often)
fig_C <- 
  ggplot(df_fig_4, 
         aes(x = msf, y = V228D), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = msf, y = V228D, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Media System Freedom (0 = Least Free; 100 = Most Free)", 
       y = "Voters Are Bribed\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel D: voters are threatened with violence at the polls (1 = very often; 
# 4 = not at all often)
fig_D <- 
  ggplot(df_fig_4, 
         aes(x = msf, y = V228H), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = msf, y = V228H, 
                      label = country_label), 
                  size = 4, family = "Times") +
  xlim(0, 100) + 
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Media System Freedom (0 = Least Free; 100 = Most Free)", 
       y = "Voters Are Threatened with Violence at the Polls\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Combine all four panels into one figure
fig_S8 <- 
  grid.arrange(fig_A, fig_B, fig_C, fig_D, ncol = 2, nrow = 2)
ggsave("Figure S8.pdf", plot = fig_S8, width = 10.5, height = 10.5)

### Figure S9 ###
# Plot Figure S9 (Internet filtering and views on the electoral process)
# Panel A: votes are counted fairly (1 = very often; 4 = not at all often)
fig_A <- 
  ggplot(df_fig_4, 
         aes(x = filtering, y = V228A), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = filtering, y = V228A, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Internet Filtering (v2smgovfilprc)", 
       y = "Votes Are Counted Fairly\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel B: opposition candidates are prevented from running (1 = very often; 
# 4 = not at all often)
fig_B <- 
  ggplot(df_fig_4, 
         aes(x = filtering, y = V228B), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = filtering, y = V228B, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Internet Filtering (v2smgovfilprc)", 
       y = "Opposition Candidates Are Prevented from Running\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel C: voters are bribed (1 = very often; 4 = not at all often)
fig_C <- 
  ggplot(df_fig_4, 
         aes(x = filtering, y = V228D), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = filtering, y = V228D, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Internet Filtering (v2smgovfilprc)", 
       y = "Voters Are Bribed\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel D: voters are threatened with violence at the polls (1 = very often; 
# 4 = not at all often)
fig_D <- 
  ggplot(df_fig_4, 
         aes(x = filtering, y = V228H), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = filtering, y = V228H, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Internet Filtering (v2smgovfilprc)", 
       y = "Voters Are Threatened with Violence at the Polls\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Combine all four panels into one figure
fig_S9 <- 
  grid.arrange(fig_A, fig_B, fig_C, fig_D, ncol = 2, nrow = 2)
ggsave("Figure S9.pdf", plot = fig_S9, width = 10, height = 10)

### Figure S10 ###
# Plot Figure 10 (Social media monitoring and views on the electoral process)
# Panel A: votes are counted fairly (1 = very often; 4 = not at all often)
fig_A <- 
  ggplot(df_fig_4, 
         aes(x = monitoring, y = V228A), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = monitoring, y = V228A, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Social Media Monitoring (v2smgovsmmon)", 
       y = "Votes Are Counted Fairly\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel B: opposition candidates are prevented from running (1 = very often; 
# 4 = not at all often)
fig_B <- 
  ggplot(df_fig_4, 
         aes(x = monitoring, y = V228B), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = monitoring, y = V228B, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Social Media Monitoring (v2smgovsmmon)", 
       y = "Opposition Candidates Are Prevented from Running\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel C: voters are bribed (1 = very often; 4 = not at all often)
fig_C <- 
  ggplot(df_fig_4, 
         aes(x = monitoring, y = V228D), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = monitoring, y = V228D, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Social Media Monitoring (v2smgovsmmon)", 
       y = "Voters Are Bribed\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Panel D: voters are threatened with violence at the polls (1 = very often; 
# 4 = not at all often)
fig_D <- 
  ggplot(df_fig_4, 
         aes(x = monitoring, y = V228H), 
         label = country_label) + 
  geom_point(shape = 20, size = 3) + 
  stat_smooth(method = "lm", formula = y ~ x, se = F, 
              fullrange = T, linetype = 5, color = "black") +
  geom_text_repel(aes(x = monitoring, y = V228H, 
                      label = country_label), 
                  size = 4, family = "Times") +
  ylim(1, 4) + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black")) + 
  labs(x = "Social Media Monitoring (v2smgovsmmon)", 
       y = "Voters Are Threatened with Violence at the Polls\n(1 = Very Often; 4 = Not at All Often)") +
  theme(text = element_text(family = "Times", size = 14),
        axis.text = element_text(color = "black")
  )

# Combine all four panels into one figure
fig_S10 <- 
  grid.arrange(fig_A, fig_B, fig_C, fig_D, ncol = 2, nrow = 2)
ggsave("Figure S10.pdf", plot = fig_S10, width = 10, height = 10)
